package org.spider.core.job;

import cn.hutool.core.date.DateUnit;
import org.quartz.JobDataMap;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.spider.api.context.SpiderContext;
import org.spider.api.context.SpiderContextHolder;
import org.spider.api.domain.model.SpiderFlow;
import org.spider.api.domain.model.Task;
import org.spider.api.enums.CronEnableType;
import org.spider.api.utils.MyDateUtil;
import org.spider.core.Spider;
import org.spider.core.service.SpiderFlowService;
import org.spider.core.service.TaskService;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.quartz.QuartzJobBean;
import org.springframework.stereotype.Component;

import javax.annotation.Resource;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

@Component
public class SpiderJob  extends QuartzJobBean {
    @Resource
    private Spider spider;
    @Resource
    TaskService taskService;
    @Resource
    private SpiderFlowService spiderFlowService;
    @Value("${spider.workplace}")
    private String workspace;
    private static final Map<Integer, SpiderContext> contextMap = new HashMap<>();
    private static final Logger logger = LoggerFactory.getLogger(SpiderJob.class);

    // 提供给controller的接口
    public void runAsync(String id,String taskName) {
        Spider.executor.submit(() -> run(id,taskName));
    }

    /**
     * 运行一个任务，从数据库中获取flow，然后加载为Spidernode节点，然后用Spider执行
     */
    public void run(String id,String taskName) {
        run(spiderFlowService.selectById(id),taskName);
    }

    public void run(SpiderFlow spiderFlow,String taskName) {
        Task task = new Task();
        task.setName(taskName);
        task.setFlowId(spiderFlow.getId());
        Date now = new Date();
        task.setBeginTime(now);
        spiderFlow.setLastExecuteTime(now);
        taskService.save(task);
        spiderFlowService.updateLastExecuteTimeById(now, spiderFlow.getId());
        run(spiderFlow, task);
    }

    /**
     * SpiderContextHolder在线程族中存储context变量，以便在logback日志打印中在正确的context中打印<br>
     * 任务开始 context初始化 存入contextMap    contextHolder.set()<br>
     * 任务结束 关闭context   移除contextMap 设置任务结束时间  contextHolder.remove()
     */

    public void run(SpiderFlow flow, Task task) {
        SpiderJobContext context = null;
        try {
            context = SpiderJobContext.create(this.workspace, flow.getId(), task.getId(), true);
            SpiderContextHolder.set(context);
            contextMap.put(task.getId(), context);
            logger.info("开始执行爬虫:{}", flow.getName());
            //真正执行任务的代码
            spider.run(flow, context);
            logger.info("爬虫{}执行完毕", flow.getName());
        } catch (Exception e) {
            logger.error("任务{}执行出错", flow.getName(), e);
        } finally {
            if (context != null) context.close();
            task.setEndTime(new Date());
            logger.info("任务{}状态更新", flow.getName());
            //只更新一个结束时间
            taskService.saveOrUpdate(task);
            logger.info("context移除task id:{}", task.getId());
            contextMap.remove(task.getId());
            SpiderContextHolder.remove();
        }
    }

    public void stop(String id) {
        SpiderContext context = contextMap.get(id);
        context.stop();
    }

    /**
     * 根据之前传的cron，定时调用该方法
     * @param context
     * @throws JobExecutionException
     */
    @Override
    protected void executeInternal(JobExecutionContext context) throws JobExecutionException {
        //按照run思路，从flow->spiderNode->开始执行
        JobDataMap jobDataMap = context.getMergedJobDataMap();
        SpiderFlow flow = (SpiderFlow) jobDataMap.get(SpiderJobManager.JOB_PARAM_NAME);
        if(CronEnableType.on == flow.getCronEnable()){
            logger.info("当前定时执行任务:{} ,下次执行时间:{}",
                    flow.getName(), MyDateUtil.format(context.getNextFireTime()));
            logger.info("flow = {}",flow);
            run(flow,"定时执行");
        }

    }
}
